clear all
set more off
cap log close

adopath + "H:\Lavecchia_7086\to-transfer-jan-2022\reghdfe_files"
adopath + "H:\Lavecchia_7086\to-transfer-jan-2022\binscatter_files"


do "H:\Lavecchia_7086\to-transfer-jan-2022\RESTAT_Replication_Programs\0_Set_Directories.do"

****************************************************************************
* This do-file uses subsamples
*	- capital gains, income, dividends and rrsp related variables
* 
* Regressions
* 
********************************************************************************

************* RUN INDIVIUDAL FIGURES/TABLES ON THEIR OWN ***********************
local figureE1=0
local figureE2=0
local figureE3=0
local figureE4=0
local figureE5=0
local figuref1b=0
local figureF1=0
local figureF2=0
local figureF3=0
local figureF4=0
local figureF5=0
local figureF6=0
local figureF7=0
local figureF8=0
local figureF9=0
local figureF10=0
local figureF11=0
local figureF12=0
local figureF13=0
local figureF14=0
local figureG1=0
local figureG1_Referee2=0
local figureG2=0
local figureH1=0
local figureH2=0
local figureH3=0
local figureH4=0

********************************************************************************

***************** FIGURE E1: RAW MEANS (UNCONDITIONAL LEVELS, ******************
***************** PERCENT POSITIVE AND CONDITIONAL LEVELS)    ******************

if (`figureE1') {

cap log close
log using "$dir_log\RESTAT_Figure_E1.log", replace

clear
*use "$dir_data\data_sample_IV_diffndiff.dta"
use "$dir_data\capital_income_final_8299.dta"
append using "$dir_data\capital_income_final_0016.dta"

keep lin__i year clkgxi wgt2_i 

*drop pos_clkgxi
gen clkgxi_dummy = (clkgxi > 0)
gen clkgxi_dummy_wgt = clkgxi_dummy*wgt2_i
gen clkgxi_wgt = clkgxi*wgt2_i
gen pos_clkgxi = clkgxi if clkgxi > 0
gen pos_clkgxi_wgt = pos_clkgxi*wgt2_i if clkgxi>0

keep wgt2_i year clkgxi clkgxi_wgt clkgxi_dummy clkgxi_dummy_wgt pos_clkgxi pos_clkgxi_wgt

*  DOMINANCE TEST
foreach var in clkgxi clkgxi_wgt pos_clkgxi pos_clkgxi_wgt {
	bysort year: egen max1_`var'=max(`var')
	gen temp=`var' if `var'!=max1_`var'
	bysort year: egen max2_`var'=max(temp)
	replace temp=.
	replace temp=`var' if `var'!=max1_`var' & `var'!=max2_`var'
	bysort year: egen max3_`var'=max(temp)
	gen sum_`var'=`var' if `var'!=max1_`var' & `var'!=max2_`var' & `var'!=max3_`var'
	drop temp max2_`var' max3_`var'
}

gen clkgxi_count = clkgxi_dummy


collapse (sum) clkgxi_count (mean) clkgxi clkgxi_dummy pos_clkgxi (semean) se_clkgxi=clkgxi se_clkgxi_dummy=clkgxi_dummy se_pos_clkgxi=pos_clkgxi (mean) clkgxi_wgt clkgxi_dummy_wgt pos_clkgxi_wgt (semean) se_clkgxi_wgt=clkgxi_wgt se_clkgxi_dummy_wgt=clkgxi_dummy_wgt se_pos_clkgxi_wgt=pos_clkgxi_wgt (mean) max1_clkgxi max1_pos_clkgxi (sum) sum_clkgxi=clkgxi sum_clkgxi_dummy=clkgxi_dummy sum_pos_clkgxi=pos_clkgxi, by(year)
		


* PASS DOMINANCE TEST
foreach var in clkgxi pos_clkgxi {
gen domtest_`var'=(max1_`var'/sum_`var'>0.8)
gen rvalue_`var'=max1_`var'/sum_`var'
}



* ROUND AMOUNTS TO THE NEAREST 100
foreach var in clkgxi_wgt pos_clkgxi_wgt {
	gen r`var'=.
	replace r`var'=round(`var',10) if `var'<=1000
	replace r`var'=round(`var',100) if `var'>1000

}


save "$dir_results\RESTAT_Figure_E1.dta", replace

export excel using "$dir_results\RESTAT_Figure_E1.xlsx", firstrow(variables) replace
 
}


***************** FIGURE E2: RAW MEANS (BELOW AND ABOVE $1K   ******************
***************** OF REALIZED CGs (EXTENSIVE & INTENSIVE))    ******************

if (`figureE2') {

cap log close
log using "$dir_log\RESTAT_Figure_E2.log", replace


clear
*use "$dir_data\data_sample_IV_diffndiff.dta"
use "$dir_data\capital_income_final_8299.dta"
append using "$dir_data\capital_income_final_0016.dta"

keep lin__i year clkgxi wgt2_i 

gen clkgxi_dummy = (clkgxi > 0)
gen clkgxi_dummy_wgt = clkgxi_dummy*wgt2_i
gen clkgxi_wgt = clkgxi*wgt2_i
gen pos_clkgxi = clkgxi if clkgxi > 0
gen pos_clkgxi_wgt = pos_clkgxi*wgt2_i if clkgxi>0
gen clkgxi_dummy_lessthan1k = (clkgxi > 0 & clkgxi <= 1000)
gen clkgxi_dummy_morethan1k = (clkgxi > 0 & clkgxi > 1000)
gen pos_clkgxi_lessthan1k = clkgxi if clkgxi > 0 & clkgxi <= 1000
gen pos_clkgxi_lessthan1k_wgt = pos_clkgxi_lessthan1k*wgt2_i
gen pos_clkgxi_morethan1k = clkgxi if clkgxi > 0 & clkgxi > 1000
gen pos_clkgxi_morethan1k_wgt = pos_clkgxi_morethan1k*wgt2_i
gen clkgxi_dummy_lessthan1k_wgt = clkgxi_dummy_lessthan1k*wgt2_i
gen clkgxi_dummy_morethan1k_wgt = clkgxi_dummy_morethan1k*wgt2_i

keep wgt2_i year pos_clkgxi_lessthan1k pos_clkgxi_morethan1k clkgxi_dummy_lessthan1k clkgxi_dummy_lessthan1k_wgt pos_clkgxi_morethan1k_wgt pos_clkgxi_lessthan1k_wgt clkgxi_dummy_morethan1k clkgxi_dummy_morethan1k_wgt clkgxi_dummy

*  DOMINANCE TEST
foreach var in pos_clkgxi_lessthan1k pos_clkgxi_morethan1k pos_clkgxi_morethan1k_wgt pos_clkgxi_lessthan1k_wgt {
	bysort year: egen max1_`var'=max(`var')
	gen temp=`var' if `var'!=max1_`var'
	bysort year: egen max2_`var'=max(temp)
	replace temp=.
	replace temp=`var' if `var'!=max1_`var' & `var'!=max2_`var'
	bysort year: egen max3_`var'=max(temp)
	gen sum_`var'=`var' if `var'!=max1_`var' & `var'!=max2_`var' & `var'!=max3_`var'
	drop temp max2_`var' max3_`var'
}

gen clkgxi_count = clkgxi_dummy

collapse (sum) clkgxi_count (mean) pos_clkgxi_lessthan1k pos_clkgxi_morethan1k clkgxi_dummy_lessthan1k clkgxi_dummy_morethan1k (semean) se_pos_clkgxi_lessthan1k=pos_clkgxi_lessthan1k se_pos_clkgxi_morethan1k = pos_clkgxi_morethan1k (mean) pos_clkgxi_morethan1k_wgt pos_clkgxi_lessthan1k_wgt clkgxi_dummy_lessthan1k_wgt clkgxi_dummy_morethan1k_wgt (semean) se_pos_clkgxi_morethan1k_wgt=pos_clkgxi_morethan1k_wgt se_pos_clkgxi_lessthan1k_wgt=pos_clkgxi_lessthan1k_wgt se_clkgxi_dummy_lessthan1k_wgt=clkgxi_dummy_lessthan1k_wgt se_clkgxi_dummy_morethan1k_wgt=clkgxi_dummy_morethan1k_wgt (mean) max1_pos_clkgxi_lessthan1k max1_pos_clkgxi_morethan1k (sum) sum_pos_clkgxi_lessthan1k=pos_clkgxi_lessthan1k sum_pos_clkgxi_morethan1k=pos_clkgxi_morethan1k, by(year)
		


* PASS DOMINANCE TEST
foreach var in pos_clkgxi_lessthan1k pos_clkgxi_morethan1k {
gen domtest_`var'=(max1_`var'/sum_`var'>0.8)
gen rvalue_`var'=max1_`var'/sum_`var'
}


* ROUND AMOUNTS TO THE NEAREST 100
foreach var in pos_clkgxi_morethan1k_wgt pos_clkgxi_lessthan1k_wgt {
	gen r`var'=.
	replace r`var'=round(`var',10) if `var'<=1000
	replace r`var'=round(`var',100) if `var'>1000

}


save "$dir_results\RESTAT_Figure_E2.dta", replace

export excel using "$dir_results\RESTAT_Figure_E2.xlsx", firstrow(variables) replace
 
}


 
***************** FIGURE E3: CG REALIZATION HISTORY BY AGE    ******************
***************** (a,d & g is age<40); (b,e & h is age 40-60);******************
***************** (c,f & i is age 60+) (EXTENSIVE & INTENSIVE)******************

if (`figureE3') {

cap log close
log using "$dir_log\RESTAT_Figure_E3.log", replace	

*************** CALCULATE HOW OFTEN REPORTED CAP GAINS IN THE PAST 

 foreach sample in weighted true {
 
****************************************************************************
clear
use "$dir_data\capital_income_final_8299.dta"
keep lin__i year clkgxi  wgt2_i tirc_i txi__i cpi_to2016
append using  "$dir_data\capital_income_final_0016.dta"
keep lin__i year clkgxi  wgt2_i tirc_i txi__i cpi_to2016
save "$dir_data\capital_income_final_smallcombined.dta", replace

****************************************************************************
clear
use "$dir_data\capital_income_final_smallcombined.dta"
drop tirc_i txi__i 

* merge with age
sort lin__i 
merge lin__i using "$dir_data/demographic_permanent.dta"
keep if _merge!=2
drop _merge
gen age=year-yob__i

*gen age94=age-(year-1994)
gen age_group=1 if age<=40
replace age_group=2 if age>40 & age<=60
replace age_group=3 if age>60
drop  age

sort  lin__i year
xtset lin__i year

* calculate lagged values
forvalues i=1(1)6{
gen clkgxi_m`i'=l`i'.clkgxi
*gen pos_clkgxi_m`i'=(clkgxi_m`i'>0 & clkgxi_m`i'!=.)
}


gen total=1

****** Calculate % with NO reported net capital gains in the past 1,2, 3, 5 and 10 years ********
gen some1=(clkgxi>0)
gen some1_1k=(clkgxi*cpi_to2016>2338.8)

gen some_past3=(clkgxi>0 | (clkgxi_m1>0 & clkgxi_m1!=.) | (clkgxi_m2>0 & clkgxi_m2!=.) )
lab var some_past3 "no cap gains this and past 2 years"
gen some_past5=(clkgxi>0 | (clkgxi_m1>0 & clkgxi_m1!=.) | (clkgxi_m2>0 & clkgxi_m2!=.) | (clkgxi_m3>0 & clkgxi_m3!=.) | (clkgxi_m4>0 & clkgxi_m4!=.))
lab var some_past5 "no cap gains this and past 4 years"
gen some_past7=(clkgxi>0 | (clkgxi_m1>0 & clkgxi_m1!=.) | (clkgxi_m2>0 & clkgxi_m2!=.) | (clkgxi_m3>0 & clkgxi_m3!=.) | (clkgxi_m4>0 & clkgxi_m4!=.) | (clkgxi_m5>0 & clkgxi_m5!=.) | (clkgxi_m6>0 & clkgxi_m6!=.))
lab var some_past7 "no cap gains this and past 6 years"

gen some1_past2=(clkgxi>0 & ((clkgxi_m1>0 & clkgxi_m1!=.) | (clkgxi_m2>0 & clkgxi_m2!=.)) )
lab var some1_past2 "no cap gains this and past 2 years"
gen some1_past4=(clkgxi>0 & ((clkgxi_m1>0 & clkgxi_m1!=.) | (clkgxi_m2>0 & clkgxi_m2!=.) | (clkgxi_m3>0 & clkgxi_m3!=.) | (clkgxi_m4>0 & clkgxi_m4!=.)))
lab var some1_past4 "no cap gains this and past 4 years"
gen some1_past6=(clkgxi>0 & ((clkgxi_m1>0 & clkgxi_m1!=.) | (clkgxi_m2>0 & clkgxi_m2!=.) | (clkgxi_m3>0 & clkgxi_m3!=.) | (clkgxi_m4>0 & clkgxi_m4!=.) | (clkgxi_m5>0 & clkgxi_m5!=.)  | (clkgxi_m6>0 & clkgxi_m6!=.)))
lab var some1_past6 "no cap gains this and past 6 years"

gen some_past3_1k=(clkgxi*cpi_to2016>2338.8 | (clkgxi_m1*cpi_to2016>2338.8 & clkgxi_m1!=.) | (clkgxi_m2*cpi_to2016>2338.8 & clkgxi_m2!=.) )
lab var some_past3_1k "no cap gains this and past 2 years"
gen some_past5_1k=(clkgxi*cpi_to2016>2338.8 | (clkgxi_m1*cpi_to2016>2338.8 & clkgxi_m1!=.) | (clkgxi_m2*cpi_to2016>2338.8 & clkgxi_m2!=.) | (clkgxi_m3*cpi_to2016>2338.8 & clkgxi_m3!=.) | (clkgxi_m4*cpi_to2016>2338.8 & clkgxi_m4!=.))
lab var some_past5_1k "no cap gains this and past 4 years"
gen some_past7_1k=(clkgxi*cpi_to2016>2338.8 | (clkgxi_m1*cpi_to2016>2338.8 & clkgxi_m1!=.) | (clkgxi_m2*cpi_to2016>2338.8 & clkgxi_m2!=.) | (clkgxi_m3*cpi_to2016>2338.8 & clkgxi_m3!=.) | (clkgxi_m4*cpi_to2016>2338.8 & clkgxi_m4!=.) | (clkgxi_m5*cpi_to2016>2338.8 & clkgxi_m5!=.) | (clkgxi_m6*cpi_to2016>2338.8 & clkgxi_m6!=.))
lab var some_past7_1k "no cap gains this and past 6 years"

gen some1_past2_1k=(clkgxi*cpi_to2016>2338.8 & ((clkgxi_m1*cpi_to2016>2338.8 & clkgxi_m1!=.) | (clkgxi_m2*cpi_to2016>2338.8 & clkgxi_m2!=.)) )
lab var some1_past2_1k "no cap gains this and past 2 years"
gen some1_past4_1k=(clkgxi*cpi_to2016>2338.8 & ((clkgxi_m1*cpi_to2016>2338.8 & clkgxi_m1!=.) | (clkgxi_m2*cpi_to2016>2338.8 & clkgxi_m2!=.) | (clkgxi_m3*cpi_to2016>2338.8 & clkgxi_m3!=.) | (clkgxi_m4*cpi_to2016>2338.8 & clkgxi_m4!=.)))
lab var some1_past4_1k "no cap gains this and past 4 years"
gen some1_past6_1k=(clkgxi*cpi_to2016>2338.8 & ((clkgxi_m1*cpi_to2016>2338.8 & clkgxi_m1!=.) | (clkgxi_m2*cpi_to2016>2338.8 & clkgxi_m2!=.) | (clkgxi_m3*cpi_to2016>2338.8 & clkgxi_m3!=.) | (clkgxi_m4*cpi_to2016>2338.8 & clkgxi_m4!=.) | (clkgxi_m5*cpi_to2016>2338.8 & clkgxi_m5!=.)  | (clkgxi_m6*cpi_to2016>2338.8 & clkgxi_m6!=.)))
lab var some1_past6_1k "no cap gains this and past 6 years"

gen clkgxi_some6=clkgxi*cpi_to2016  if  some1_past6==1
gen none1_past6=(clkgxi>0 & some1_past6==0)
gen clkgxi_none6=clkgxi*cpi_to2016  if clkgxi>0 & some1_past6==0

keep year wgt2_i total some* age_group clkgxi* none1_past6

if "`sample'"=="true"{

local by_what year age_group
*  DOMINANCE TEST
foreach var in clkgxi_some6 clkgxi_none6  {
	bysort `by_what': egen max1_`var'=max(`var')
	gen temp=`var' if `var'!=max1_`var'
	bysort year: egen max2_`var'=max(temp)
	replace temp=.
	replace temp=`var' if `var'!=max1_`var' & `var'!=max2_`var'
	bysort `by_what': egen max3_`var'=max(temp)
	gen sum_`var'=`var' if `var'!=max1_`var' & `var'!=max2_`var' & `var'!=max3_`var'
	drop temp max2_`var' max3_`var'
}

collapse  (sum)  total some1 some1_1k some_past3 some_past5 some_past7 some_past3_1k some_past5_1k some_past7_1k none1_past6 ///
		  (sum)    		some1_past2 some1_past4 some1_past6 some1_past2_1k some1_past4_1k some1_past6_1k ///
		  (mean) 	clkgxi_some6 clkgxi_none6 ///
		  (mean) max1_clkgxi_some6 max1_clkgxi_none6  ///
		  (sum)  sum_clkgxi_some6 sum_clkgxi_none6  ///
,   by(year age_group)

foreach var in clkgxi_some6 clkgxi_none6 {
gen domtest_`var'=(max1_`var'/sum_`var'>0.8)
gen rvalue_`var'=max1_`var'/sum_`var'
}

}

if "`sample'"=="weighted"{

collapse  (sum)  total some1 some1_1k some_past3 some_past5 some_past7 some_past3_1k some_past5_1k some_past7_1k none1_past6 ///
		  (sum)    		some1_past2 some1_past4 some1_past6 some1_past2_1k some1_past4_1k some1_past6_1k ///
		  (mean) 	clkgxi_some6 clkgxi_none6	///
 [w=wgt2_i] ,   by(year age_group)
 
 
 * ROUND COUNTS &  inflate by 5 
foreach var in total some1 some1_1k some_past3 some_past5 some_past7 some_past3_1k some_past5_1k some_past7_1k none1_past6 some1_past2 some1_past4 some1_past6 some1_past2_1k some1_past4_1k some1_past6_1k  {
gen r`var'=round(`var'*5,5)
}

* ROUND AMOUNTS 
foreach var in  clkgxi_some6 clkgxi_none6  {
	gen r`var'=.
	replace r`var'=round(`var',10) if `var'<=1000
	replace r`var'=round(`var',100) if `var'>1000
}

}

save "$dir_results\RESTAT_Figure_E3_participation_freq_graphs_`sample'.dta", replace
export excel using "$dir_results\RESTAT_Figure_E3_participation_freq_graphs_`sample'.xlsx", firstrow(variables) replace	


}
		
}


***************** FIGURE E4: UNCONDITIONAL AND CONDITIONAL AVERAGE *************
***************** REALIZED CGs BY MARITAL STATUS                   *************

if (`figureE4') {

cap log close
log using "$dir_log\RESTAT_Figure_E4.log", replace


clear
*use "$dir_data\data_sample_IV_diffndiff.dta"
use "$dir_data\capital_income_final_8299.dta"
keep lin__i year clkgxi wgt2_i
merge 1:1 lin__i year using "$dir_data/demographic_8299.dta", keepusing (fcmp_i)
keep if _merge == 3
save "$dir_data\temp_merged_8299.dta", replace

clear
use "$dir_data\capital_income_final_0016.dta"
keep lin__i year clkgxi wgt2_i
merge 1:1 lin__i year using "$dir_data/demographic_0016.dta", keepusing (fcmp_i)
keep if _merge == 3
save "$dir_data\temp_merged_0016.dta", replace


append using "$dir_data\temp_merged_8299.dta"
keep lin__i year clkgxi wgt2_i fcmp_i
sort lin__i year

gen married = (fcmp_i == 1 | fcmp_i == 11 | fcmp_i == 2 | fcmp_i == 12 | fcmp_i == 5 | fcmp_i == 15)

gen clkgxi_dummy = (clkgxi > 0)
gen clkgxi_dummy_wgt = clkgxi_dummy*wgt2_i
gen clkgxi_wgt = clkgxi*wgt2_i
gen pos_clkgxi = clkgxi if clkgxi > 0
gen pos_clkgxi_wgt = clkgxi*wgt2_i if clkgxi > 0

keep wgt2_i year married clkgxi clkgxi_wgt clkgxi_dummy clkgxi_dummy_wgt pos_clkgxi pos_clkgxi_wgt

*  DOMINANCE TEST
foreach var in clkgxi clkgxi_wgt pos_clkgxi pos_clkgxi_wgt {
	bysort married year: egen max1_`var'=max(`var')
	gen temp=`var' if `var'!=max1_`var'
	bysort married year: egen max2_`var'=max(temp)
	replace temp=.
	replace temp=`var' if `var'!=max1_`var' & `var'!=max2_`var'
	bysort married year: egen max3_`var'=max(temp)
	gen sum_`var'=`var' if `var'!=max1_`var' & `var'!=max2_`var' & `var'!=max3_`var'
	drop temp max2_`var' max3_`var'
}

gen clkgxi_count = clkgxi_dummy


collapse (sum) clkgxi_count (mean) clkgxi clkgxi_dummy pos_clkgxi (semean) se_clkgxi=clkgxi se_clkgxi_dummy=clkgxi_dummy se_pos_clkgxi=pos_clkgxi (mean) clkgxi_wgt clkgxi_dummy_wgt pos_clkgxi_wgt (semean) se_clkgxi_wgt=clkgxi_wgt se_clkgxi_dummy_wgt=clkgxi_dummy_wgt se_pos_clkgxi_wgt=pos_clkgxi_wgt (mean) max1_clkgxi max1_pos_clkgxi (sum) sum_clkgxi=clkgxi sum_pos_clkgxi=pos_clkgxi, by(married year)
		


* PASS DOMINANCE TEST
foreach var in clkgxi pos_clkgxi {
gen domtest_`var'=(max1_`var'/sum_`var'>0.8)
gen rvalue_`var'=max1_`var'/sum_`var'
}



* ROUND AMOUNTS TO THE NEAREST 100
foreach var in clkgxi_wgt pos_clkgxi_wgt {
	gen r`var'=.
	replace r`var'=round(`var',10) if `var'<=1000
	replace r`var'=round(`var',100) if `var'>1000

}


save "$dir_results\RESTAT_Figure_E4.dta", replace

export excel using "$dir_results\RESTAT_Figure_E4.xlsx", firstrow(variables) replace

erase "$dir_data\temp_merged_8299.dta"
erase "$dir_data\temp_merged_0016.dta"

}



***************** FIGURE E5: RAW MEANS (UNCONDITIONAL IHS) *********************

if (`figureE5') {

cap log close
log using "$dir_log\RESTAT_Figure_E5.log", replace


clear
use "$dir_results\RESTAT_Figure_5.dta"

keep cg_8593 year clkgxi log_clkgxi ihs_clkgxi_wgt


save "$dir_results\RESTAT_Figure_E5a.dta", replace

export excel using "$dir_results\RESTAT_Figure_E5a.xlsx", firstrow(variables) replace
 
}



************** FIGURE F1: INTENSIVE MARGIN RESULTS IN LEVELS *******************

if (`figureF1') {

cap log close
cap log using "$dir_log\RESTAT_Figure_F1.log", replace

* INTENSIVE MARGIN LEVELS REGRESSIONS
clear
use "$dir_data\data_sample_DD_intensive.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=1
reg clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F1.dta", replace

* LEVELS graphs:
clear
use "$dir_results\RESTAT_Figure_F1.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F1.xlsx", firstrow(variables) replace


}


////////////////////////////////////////////////////////////////////////////////////
///// ***** COMMENT R2 Scale CG in Levels by Total Income From 1990-1993 ***** /////
////////////////////////////////////////////////////////////////////////////////////


if (`figuref1b') {
	
cap log close
cap log using "$dir_log\RESTAT_Figure_F1b.log", replace

* Calculate Average Total Income from 1990-1993
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if flag_losses == 1
drop if year < 1990
drop if year > 1993
keep lin__i year tirc_i

by lin__i, sort : egen float avg_tirc_i_1990_1993 = mean(tirc_i)
collapse (mean) avg_tirc_i_1990_1993, by(lin__i)
save "$dir_data\temp_tirc_i_1990_1993.dta", replace

* LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if flag_losses == 1
drop if year < 1990
drop if year > 1999

merge m:1 lin__i using "$dir_data\temp_tirc_i_1990_1993.dta"
tab _merge
keep if _merge == 3 /* drops 911 or 0.06% of tax filers that only appear in the sample after 1994 (after previously being there in 1985-1989 */

gen flag_neg_tirc_i = (tirc_i <= 0)
gen flag_neg_avg_tirc_i_1990_1993 = (avg_tirc_i_1990_1993 <= 0)

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

gen clkgxi_tirc_i = clkgxi / tirc_i

gen clkgxi_tirc_i_1990_1993 = clkgxi / avg_tirc_i_1990_1993


table year cg_8593 if clkgxi >= 0 & cg_8593 >= 2 & flag_neg_avg_tirc_i_1990_1993 == 0, stat(m clkgxi_tirc_i_1990_1993 )
drop treat_cg_8593 T_event
gen treat_cg_8593 = (cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-10=-10)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)


* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi_tirc_i_1990_1993=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum clkgxi_tirc_i_1990_1993 if cg_8593==`g' & year==1993 & flag_neg_tirc_i == 0
global ave_clkgxi_tirc_i_1990_1993 =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=1
reg flag_neg_avg_tirc_i_1990_1993  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_clkgxi_tirc_i_ratio.do"
global version=2
reg clkgxi_tirc_i_1990_1993  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2) & flag_neg_avg_tirc_i_1990_1993 == 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_clkgxi_tirc_i_ratio.do"
global version=3
reg flag_neg_avg_tirc_i_1990_1993  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2) & clkgxi > 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_clkgxi_tirc_i_ratio.do"
global version=4
reg clkgxi_tirc_i_1990_1993  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2) & flag_neg_avg_tirc_i_1990_1993 == 0 & clkgxi > 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_clkgxi_tirc_i_ratio.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi_tirc_i_1990_1993
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi_tirc_i_1990_1993 version b* se*
save "$dir_results\RESTAT_Figure_F1b.dta", replace

* Unconditional graphs:
clear
use "$dir_results/RESTAT_Figure_F1b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi_tirc_i_1990_1993 cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F1b.xlsx", firstrow(variables) replace

}




************** FIGURE F2: ROBUSTNESS CHECK WITH INDIVIDUAL FE ******************

if (`figureF2') {
	
************* FIGURE F2B: INTENSIVE MARGIN WITH INDIVIDUAL FE ******************	

cap log close
cap log using "$dir_log\RESTAT_Figure_F2b.log", replace

* LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_intensive.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=4
areg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i) absorb(lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F2b.dta", replace

* LOGS graphs:
clear
use "$dir_results\RESTAT_Figure_F2b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F2b.xlsx", firstrow(variables) replace


********************************************************************************
********* FIGURE F2A: EXTENSIVE MARGIN RESULTS WITH INDIVIDUAL FE **************

cap log close
cap log using "$dir_log\RESTAT_Figure_F2a.log", replace

* LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global pos_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=4
areg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i) absorb(lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr pos_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr pos_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F2a.dta", replace

* Extensive Margin graphs:
clear
use "$dir_results\RESTAT_Figure_F2a.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long b se , i(version N ave_mtr_zero ave_mtr pos_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F2a.xlsx", firstrow(variables) replace

}



************** FIGURE F3: ROBUSTNESS CHECK WITH INCOME CONTROLS ****************

if (`figureF3') {
	
************* FIGURE F3B: INTENSIVE MARGIN WITH INCOME CONTROLS ****************

cap log close
cap log using "$dir_log\RESTAT_Figure_F3b.log", replace

* LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_intensive.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

merge 1:1 lin__i year using "$dir_data\capital_income_final_8299.dta", keepusing(xtirci)
drop if _merge == 2

gen log_xtirci = log(1 + xtirci)

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 
local tircd tircd*

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=21
reg log_clkgxi ${main_independent} `demographics' `other' log_xtirci if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=22
reg log_clkgxi ${main_independent} `demographics' `other' `tircd' if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=23
reg log_clkgxi ${main_independent} `demographics' `other' `tircd' log_xtirci if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F3b.dta", replace

* LOGS graphs:
clear
use "$dir_results\RESTAT_Figure_F3b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F3b.xlsx", firstrow(variables) replace


********************************************************************************
********* FIGURE F3A: EXTENSIVE MARGIN RESULTS WITH INCOME CONTROLS ************

cap log close
cap log using "$dir_log\RESTAT_Figure_F3a.log", replace

* LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

merge 1:1 lin__i year using "$dir_data\capital_income_final_8299.dta", keepusing(xtirci)
drop if _merge == 2

gen log_xtirci = log(1 + xtirci)

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 
local tircd tircd*

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global pos_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=21
reg pos_clkgxi ${main_independent} `demographics' `other' log_xtirci if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=22
reg pos_clkgxi ${main_independent} `demographics' `other' `tircd'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=23
reg pos_clkgxi ${main_independent} `demographics' `other' `tircd' log_xtirci  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr pos_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr pos_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F3a.dta", replace

* Extensive Margin graphs:
clear
use "$dir_results\RESTAT_Figure_F3a.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long b se , i(version N ave_mtr_zero ave_mtr pos_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F3a.xlsx", firstrow(variables) replace

}



***************** FIGURE F4: ROBUSTNESS CHECK WITH AGE/LC CONTROLS *************

if (`figureF4') {


*************** FIGURE F4B: INTENSIVE MARGIN                    ****************

cap log close
cap log using "$dir_log\RESTAT_Figure_F4.log", replace


clear
use "$dir_data/data_sample_DD_intensive.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

egen age_group = group(age cg_8593)
egen age_year = group(age year)

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)


* set as panel data
sort lin__i year
xtset lin__i year
char T_event[omit] -1
xi i.T_event, pref(_)


* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"


global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=24
areg log_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i) absorb(age_group)
do "$dir_do/DiffnDiff_Save.do"



drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results/RESTAT_Figure_F4b.dta", replace


* LOGS INTENSIVE graphs:
clear
use "$dir_results\RESTAT_Figure_F4b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F4b.xlsx", firstrow(variables) replace



********** FIGURE F4A: EXTENSIVE MARGIN                         ****************
clear
use "$dir_data/data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

egen age_group = group(age cg_8593)
egen age_year = group(age year)

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"


global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=24
areg pos_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i) absorb(age_group)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results/RESTAT_Figure_F4a.dta", replace


* LOGS graphs:
clear
use "$dir_results\RESTAT_Figure_F4a.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F4a.xlsx", firstrow(variables) replace


}



**************** FIGURE F5: ROBUSTNESS CHECK VARYING THE CONTROL ***************
**************** GROUP THRESHHOLD                                ***************

if (`figureF5') {
	
********************************************************************************
* PANEL A: CONTROL GROUP THRESHOLD $100,000 - $143,333 (1993 dollars)          *
* ($150,000 - $215,000 in 2016 dollars)                                        *
********************************************************************************	
	
* UNCONDITIONAL IHS REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"

gen cg_8593_alt1 = cg_8593
replace cg_8593_alt1 = 2 if cg_8593 == 1 & (clkgxi8593 >= 150000 & clkgxi8593 <= 215000)
drop cg_8593
rename cg_8593_alt1 cg_8593

drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum ihs_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg ihs_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"



drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5a_unconditional.dta", replace


* IHS Unconditional Graphs:
clear
use "$dir_results\RESTAT_Figure_F5a_unconditional.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5a_unconditional.xlsx", firstrow(variables) replace


* CONDITIONAL LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_intensive.dta"

gen cg_8593_alt1 = cg_8593
replace cg_8593_alt1 = 2 if cg_8593 == 1 & (clkgxi8593 >= 150000 & clkgxi8593 <= 215000)
drop cg_8593
rename cg_8593_alt1 cg_8593

drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* keep only those with positive realizations
keep if clkgxi > 0
drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5a_intensive.dta", replace


* Logs Intensive Graphs:
clear
use "$dir_results\RESTAT_Figure_F5a_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5a_intensive.xlsx", firstrow(variables) replace


* EXTENSIVE MARGIN REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"

gen cg_8593_alt1 = cg_8593
replace cg_8593_alt1 = 2 if cg_8593 == 1 & (clkgxi8593 >= 150000 & clkgxi8593 <= 215000)
drop cg_8593
rename cg_8593_alt1 cg_8593

drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"



drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5a_extensive.dta", replace


* Extensive Margin Graphs:
clear
use "$dir_results\RESTAT_Figure_F5a_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5a_extensive.xlsx", firstrow(variables) replace


********************************************************************************
* PANEL B: CONTROL GROUP THRESHOLD $100,000 - $153,333 (1993 dollars)          *
* ($150,000 - $230,000 in 2016 dollars)                                        *
********************************************************************************	
	
* UNCONDITIONAL IHS REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"

gen cg_8593_alt2 = cg_8593
replace cg_8593_alt2 = 2 if cg_8593 == 1 & (clkgxi8593 >= 150000 & clkgxi8593 <= 230000)
drop cg_8593
rename cg_8593_alt2 cg_8593

drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum ihs_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg ihs_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5b_unconditional.dta", replace


* IHS Unconditional Graphs:
clear
use "$dir_results\RESTAT_Figure_F5b_unconditional.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5b_unconditional.xlsx", firstrow(variables) replace


* CONDITIONAL LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_intensive.dta"

gen cg_8593_alt2 = cg_8593
replace cg_8593_alt2 = 2 if cg_8593 == 1 & (clkgxi8593 >= 150000 & clkgxi8593 <= 230000)
drop cg_8593
rename cg_8593_alt2 cg_8593

drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* keep only those with positive realizations
keep if clkgxi > 0
drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5b_intensive.dta", replace


* Logs Intensive Graphs:
clear
use "$dir_results\RESTAT_Figure_F5b_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5b_intensive.xlsx", firstrow(variables) replace


* EXTENSIVE MARGIN REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"

gen cg_8593_alt2 = cg_8593
replace cg_8593_alt2 = 2 if cg_8593 == 1 & (clkgxi8593 >= 150000 & clkgxi8593 <= 230000)
drop cg_8593
rename cg_8593_alt2 cg_8593

drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"



drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5b_extensive.dta", replace


* Extensive Margin Graphs:
clear
use "$dir_results\RESTAT_Figure_F5b_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5b_extensive.xlsx", firstrow(variables) replace


********************************************************************************
* PANEL C: CONTROL GROUP THRESHOLD $100,000+ (1993 dollars) (i.e. unlimited)   *
* ($150,000 - unlimited in 2016 dollars)                                       *
********************************************************************************	
	
* UNCONDITIONAL IHS REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"

gen cg_8593_alt3 = cg_8593
replace cg_8593_alt3 = 2 if cg_8593 == 1 & clkgxi8593 >= 150000
drop cg_8593
rename cg_8593_alt3 cg_8593

drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum ihs_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg ihs_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5c_unconditional.dta", replace


* IHS Unconditional Graphs:
clear
use "$dir_results\RESTAT_Figure_F5c_unconditional.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5c_unconditional.xlsx", firstrow(variables) replace


* CONDITIONAL LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_intensive.dta"

gen cg_8593_alt3 = cg_8593
replace cg_8593_alt3 = 2 if cg_8593 == 1 & clkgxi8593 >= 150000
drop cg_8593
rename cg_8593_alt3 cg_8593


drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* keep only those with positive realizations
keep if clkgxi > 0
drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5c_intensive.dta", replace


* Logs Intensive Graphs:
clear
use "$dir_results\RESTAT_Figure_F5c_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5c_intensive.xlsx", firstrow(variables) replace


* EXTENSIVE MARGIN REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"

gen cg_8593_alt3 = cg_8593
replace cg_8593_alt3 = 2 if cg_8593 == 1 & clkgxi8593 >= 150000
drop cg_8593
rename cg_8593_alt3 cg_8593


drop if year > 1999
drop if year < 1990
drop if flag_losses == 1

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 num_reports_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 num_reports_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F5c_extensive.dta", replace


* Extensive Margin Graphs:
clear
use "$dir_results\RESTAT_Figure_F5c_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F5c_extensive.xlsx", firstrow(variables) replace


}


*************** FIGURE F6: ROBUSTNESS CHECK EXCLUDING TAX FILERS ***************
*************** WHOSE 1994 REALIZATIONS PUT THEM ABOVE $100K     ***************

if (`figureF6') {
	
cap log close
cap log using "$dir_log\RESTAT_Figure_F6.log", replace

******************************************
* PART C: UNCONDITIONAL LOGS REGRESSIONS *
******************************************

clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if cg_8593 == 5 & group_5h == 0
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

gen clkgxi8594 = clkgxi8593 + cg94
gen flag_group_switch = (clkgxi8594  >= 150000 & cg_8593 >= 3)


drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)


* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"


global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum ihs_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg ihs_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & flag_group_switch == 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"



drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F6c.dta", replace


* LOGS graphs:
clear
use "$dir_results\RESTAT_Figure_F6c.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F6c.xlsx", firstrow(variables) replace


*********************************************************************
* PART B: CONDITIONAL ON POSITIVE/INTENSIVE MARGIN LOGS REGRESSIONS *
*********************************************************************

clear
use "$dir_data\data_sample_DD_intensive.dta"
drop if cg_8593 == 5 & group_5h == 0
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

gen clkgxi8594 = clkgxi8593 + cg94
gen flag_group_switch = (clkgxi8594  >= 150000 & cg_8593 >= 3)


* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"


global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg log_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & flag_group_switch == 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F6b.dta", replace


* LOGS graphs:
clear
use "$dir_results\RESTAT_Figure_F6b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F6b.xlsx", firstrow(variables) replace	

******************************************
* PART A: EXTENSIVE MARGIN REGRESSIONS   *
******************************************

clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if cg_8593 == 5 & group_5h == 0
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

gen clkgxi8594 = clkgxi8593 + cg94
gen flag_group_switch = (clkgxi8594  >= 150000 & cg_8593 >= 3)


drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)


* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"


global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg pos_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & flag_group_switch == 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F6a.dta", replace


* LOGS graphs:
clear
use "$dir_results\RESTAT_Figure_F6a.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F6a.xlsx", firstrow(variables) replace

	
	
}



****************** FIGURE F7: EXTENDING THE EVENT WINDOW ***********************

if (`figureF7') {

cap log close
cap log using "$dir_log\RESTAT_Figure_F7.log", replace



********************************************************************
* PART B: INTENSIVE LOGS REGRESSIONS - VERSION 2 WINDOW 1982-2004  *
********************************************************************

* LOGS REGRESSIONS
clear
use "$dir_data\data_sample_DD_intensive.dta"
drop if flag_losses == 1

* keep years 1982-2006
drop if year < 1982
drop if year > 2006

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

* 
drop treat_cg_8593 T_event _T_event_1 _T_event_2 _T_event_3 _T_event_4 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_11 _T_event_12 _T_event_13 _T_event_14 _T_event_15 _T_event_16 _T_event_17 _T_event_18 _T_event_19 _T_event_20 _T_event_21 _T_event_22 _T_event_23
gen treat_cg_8593 = (cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-12=-12)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)25{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_4 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 _T_event_11 _T_event_13 _T_event_14 _T_event_15 _T_event_16 _T_event_17 _T_event_18 _T_event_19 _T_event_20 _T_event_21 _T_event_22 _T_event_23 _T_event_24 _T_event_25 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_long_v2.do"

drop if cg_8593==`g' & _n>=1500

}



keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F7b_v2.dta", replace

* LOGS graphs:
clear
use "$dir_results\RESTAT_Figure_F7b_v2.dta"
replace b12 = 0
drop if cg_8593_2==.
reshape long b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1981 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F7b_v2.xlsx", firstrow(variables) replace


*********************************************************************
* PART A: EXTENSIVE MARGIN REGRESSIONS - VERSION 2 WINDOW 1982-2006 *
*********************************************************************

* EXTENSIVE MARGIN REGRESSIONS
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if flag_losses == 1

* keep years 1982-2006
drop if year < 1982
drop if year > 2006

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

* 
drop treat_cg_8593 T_event _T_event_1 _T_event_2 _T_event_3 _T_event_4 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_11 _T_event_12 _T_event_13 _T_event_14 _T_event_15 _T_event_16 _T_event_17 _T_event_18 _T_event_19 _T_event_20 _T_event_21 _T_event_22 _T_event_23
gen treat_cg_8593 = (cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-12=-12)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)25{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_4 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 _T_event_11 _T_event_13 _T_event_14 _T_event_15 _T_event_16 _T_event_17 _T_event_18 _T_event_19 _T_event_20 _T_event_21 _T_event_22 _T_event_23 _T_event_24 _T_event_25 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_long_v2.do"

drop if cg_8593==`g' & _n>=1500

}



keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F7a_v2.dta", replace

* Extensive Margin graphs:
clear
use "$dir_results\RESTAT_Figure_F7a_v2.dta"
replace b12 = 0
drop if cg_8593_2==.
reshape long b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1981 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F7a_v2.xlsx", firstrow(variables) replace






}





***************** FIGURE F8: HETEROGENEITY BY AGE RESULTS **********************

if (`figureF8') {

cap log close
cap log using "$dir_log\RESTAT_Figure_F8.log", replace

*********************************************************
* TOTAL RESPONSE - v2: age<50 & v3: 50<= age < 65       *
*********************************************************

clear
use "$dir_data/data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls

set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & age < 50, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg log_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & (age >= 50 & age < 65), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2  version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results/RESTAT_Figure_F8_unconditional.dta", replace


* LOGS Unconditional graphs:
clear
use "$dir_results/RESTAT_Figure_F8_unconditional.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F8_unconditional.xlsx", firstrow(variables) replace


*********************************************************
* INTENSIVE MARGIN - v2: age<50 & v3: 50<= age < 65     *
*********************************************************

*** INTENSIVE MARGIN ***
clear
use "$dir_data/data_sample_DD_intensive.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls

set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & age < 50, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg log_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & (age >= 50 & age < 65), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2  version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results/RESTAT_Figure_F8_intensive.dta", replace
*log close


* LOGS Intensive graphs:
clear
use "$dir_results/RESTAT_Figure_F8_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F8_intensive.xlsx", firstrow(variables) replace


*********************************************************
* EXTENSIVE MARGIN - v2: age<50 & v3: 50<= age < 65     *
*********************************************************

*** EXTENSIVE MARIGN ***
set more off
set matsize 11000

clear
use "$dir_data/data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls

set matsize 10000
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global pos_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & age < 50, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593==2) & (age >= 50 & age < 65), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr pos_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr pos_clkgxi version b* se*
save "$dir_results/RESTAT_Figure_F8_extensive.dta", replace


* EXTENSIVE MARGIN GRAPHS: AGE HETEROGENEITY
clear
use "$dir_results\RESTAT_Figure_F8_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr pos_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F8_extensive.xlsx", firstrow(variables) replace

}




***************** FIGURE F9: Heterogeneity by Marial Status ********************

if (`figureF9') {

cap log close
cap log using "$dir_log\RESTAT_Figure_F9.log", replace


*** INTENSIVE/CONDITIONAL ***
clear
use "$dir_data\data_sample_DD_intensive.dta"
drop if year > 1999
drop if year < 1990
gen married = (fcmp_i == 1 | fcmp_i == 11 | fcmp_i == 2 | fcmp_i == 12 | fcmp_i == 5 | fcmp_i == 15)

* set as panel data
sort lin__i year
xtset lin__i year

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=1
reg log_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==2 | cg_8593==`g'),  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & married == 1,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg log_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & married == 0,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}

keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F9cd_intensive.dta", replace
*log close


* Intensive Margin graphs:
clear
use "$dir_results/RESTAT_Figure_F9cd_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F9cd_intensive.xlsx", firstrow(variables) replace


*** EXTENSIVE MARGIN ***
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 1999
drop if year < 1990
gen married = (fcmp_i == 1 | fcmp_i == 11 | fcmp_i == 2 | fcmp_i == 12 | fcmp_i == 5 | fcmp_i == 15)

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=1
reg pos_clkgxi ${main_independent} `demographics' `other'  if (cg_8593==2 | cg_8593==`g'),  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=2
reg pos_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & married == 1,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg pos_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & married == 0,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F9ab_extensive.dta", replace


* EXTENSIVE MARGIN graphs:
clear
use "$dir_results/RESTAT_Figure_F9ab_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F9ab_extensive.xlsx", firstrow(variables) replace

}


***************** FIGURE F10: Heterogeneity by Gender       ********************

if (`figureF10') {

cap log close
cap log using "$dir_log\RESTAT_Figure_F10.log", replace


*** INTENSIVE/CONDITIONAL ***
clear
use "$dir_data\data_sample_DD_intensive.dta"
drop if year > 1999
drop if year < 1990

* set as panel data
sort lin__i year
xtset lin__i year

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & num_sxco_i == 1,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg log_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & num_sxco_i == 2,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}

keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F10b_intensive.dta", replace
*log close


* Intensive Margin graphs:
clear
use "$dir_results/RESTAT_Figure_F10b_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F10b_intensive.xlsx", firstrow(variables) replace



*** EXTENSIVE MARGIN ***
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 1999
drop if year < 1990

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg pos_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & num_sxco_i == 1,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg pos_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & num_sxco_i == 2,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F10a_extensive.dta", replace
*log close


* EXTENSIVE MARGIN graphs:
clear
use "$dir_results/RESTAT_Figure_F10a_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F10a_extensive.xlsx", firstrow(variables) replace


*** UNCONDITIONAL / TOTAL RESPONSE ***
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 1999
drop if year < 1990

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum ihs_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg ihs_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & num_sxco_i == 1,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"
global version=3
reg ihs_clkgxi ${main_independent} `demographics' `other' if (cg_8593==2 | cg_8593==`g') & num_sxco_i == 2,  vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F10c_unconditional.dta", replace


* IHS Unconditional Graphs:
clear
use "$dir_results/RESTAT_Figure_F10c_unconditional.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F10c_unconditional.xlsx", firstrow(variables) replace


}


************** FIGURE F11: Drop Tax Filers that Ever Report ********************
************** Busises Income                               ********************

if (`figureF11') {


*** LOGS REGRESSIONS ***
clear
use "$dir_data/data_sample_DD_unconditional.dta"

merge lin__i using "$dir_data\business_owners.dta"
by lin__i, sort : egen float num_years_se_income = total(se_income_thisyear)
keep if _merge == 3
drop if num_years_se_income > 0

drop if year < 1990
drop if year > 1999

* keep only those with positive contributions
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F11b_intensive.dta", replace

* Intensive Margin graphs:
clear
use "$dir_results/RESTAT_Figure_F11b_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F11b_intensive.xlsx", firstrow(variables) replace


*** EXTENSIVE MARGIN REGRESSIONS ***
clear
use "$dir_data/data_sample_DD_unconditional.dta"

merge lin__i using "$dir_data\business_owners.dta"
by lin__i, sort : egen float num_years_se_income = total(se_income_thisyear)
keep if _merge == 3
drop if num_years_se_income > 0

drop if year < 1990
drop if year > 1999

char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year
global num_reports=4

di  " ALTOGETHER"
global version=2
reg pos_clkgxi   ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500
}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F11a_extensive.dta", replace

* Extensive margin graphs
clear
use "$dir_results\RESTAT_Figure_F11a_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F11a_extensive.xlsx", firstrow(variables) replace


}


************ FIGURE F12: Combine Unmarried Tax Filers and     ******************
************ Married Tax Filers with an Adjusted Family-Level ******************
************ LCGE Limit                                       ******************

if (`figureF12') {
	
* Set up
local setup = 1
if (`setup') {

clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 2004
drop if year < 1985

gen temp_sample = 1
collapse (count) n = temp_sample (mean) cg_8593 clkgxi8593 cg94, by(lin__i year)
by lin__i, sort : egen float temp_cg94 = min(cg94)
drop cg94
rename temp_cg94 cg94
save "$dir_data\temp_sample.dta", replace


clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 2004
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_final_8299.dta", keepusing(xtiici klpyci xdiv_i rrspci invi)
drop if _merge == 2
rename _merge _merge_8299
keep if year < 2000
tab year
save "$dir_data\temp_8299.dta", replace


clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 2004
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_final_0016.dta", keepusing(xtiici klpyci xdiv_i rrspci invi)
drop if _merge == 2
rename _merge _merge_0016
keep if year >= 2000
tab year
save "$dir_data\temp_0016.dta", replace

append using "$dir_data\temp_8299.dta"
table year cg_8593, stat(m invi)


* Flag 1994 Switchers
gen clkgxi8594 = clkgxi8593 + cg94
gen flag_group_switch = (clkgxi8594  >= 150000 & cg_8593 >= 3)

* generate marital status variable
gen married = (fcmp_i == 1 | fcmp_i == 11 | fcmp_i == 2 | fcmp_i == 12 | fcmp_i == 5 | fcmp_i == 15)

* generate log & IHS total income variables
gen log_tirc_i = log(tirc_i)
gen ihs_tirc_i = log(tirc_i + sqrt(1 + tirc_i^2))

* generate log & IHS ggex variables
gen log_ggex_i = log(ggex_i)
gen ihs_ggex_i = log(ggex_i + sqrt(1 + ggex_i^2))

* generate log & IHS of capital income variables
local cap_inc_vars klpyci xdiv_i invi_i rrspci
foreach v in `cap_inc_vars' {

gen log_`v' = log(`v')
ge ihs_`v' = log(`v' + sqrt(1 + `v'^2))

}

* generate income net of capital gains
gen total_inc_less_clkgxi = tirc_i - clkgxi
gen log_total_inc_less_clkgxi = log(total_inc_less_clkgxi)
gen ihs_total_inc_less_clkgxi = log(total_inc_less_clkgxi + sqrt(1 + total_inc_less_clkgxi^2))
gen flag_total_inc_less_clkgxi = (total_inc_less_clkgxi < 0)
tab flag_total_inc_less_clkgxi
tab cg_8593 flag_total_inc_less_clkgxi

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)


* generate post-LCGE cancellation variables
gen post = (year >=  1995)
egen float cg_8593_post = group(cg_8593 post) if cg_8593 != 1
table cg_8593 post, stat(m cg_8593_post)

local vars clkgxi log_clkgxi ihs_clkgxi age married fsizei tnkidi num_sxco_i province tirc_i log_tirc_i ihs_tirc_i total_inc_less_clkgxi log_total_inc_less_clkgxi ihs_total_inc_less_clkgxi ggex_i log_ggex_i ihs_ggex_i klpyci log_klpyci ihs_klpyci xdiv_i log_xdiv_i ihs_xdiv_i invi_i log_invi_i ihs_invi_i rrspci log_rrspci ihs_rrspci pos_clkgxi mtr mtr_zero mtr_max mtr_prev

local testvars clkgxi log_clkgxi ihs_clkgxi age married fsizei tnkidi num_sxco_i province tirc_i log_tirc_i ihs_tirc_i ggex_i log_ggex_i ihs_ggex_i klpyci log_klpyci ihs_klpyci xdiv_i log_xdiv_i ihs_xdiv_i invi_i log_invi_i ihs_invi_i rrspci log_rrspci ihs_rrspci pos_clkgxi mtr mtr_zero mtr_max mtr_prev



keep lin__i year post cg_8593 clkgxi8594 clkgxi log_clkgxi ihs_clkgxi age married fsizei tnkidi num_sxco_i province tirc_i log_tirc_i ihs_tirc_i total_inc_less_clkgxi log_total_inc_less_clkgxi ihs_total_inc_less_clkgxi ggex_i log_ggex_i ihs_ggex_i klpyci log_klpyci ihs_klpyci xdiv_i log_xdiv_i ihs_xdiv_i invi_i log_invi_i ihs_invi_i rrspci log_rrspci ihs_rrspci pos_clkgxi mtr mtr_zero mtr_max mtr_prev flag_group_switch xtiici num_psco_i age2 age3 fcmp_i flag_total_inc_less_clkgxi cg_8593_post wgt2_i cpi_to2016

*erase "$dir_data\temp_8299.dta"
*erase "$dir_data\temp_0016.dta"
save "$dir_data/RESTAT_Response_R1_September_2023.dta", replace

}


clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 2004
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_spousal_8299.dta", keepusing(clkgxp)
drop if _merge == 2
rename _merge _merge_8299
keep if year < 2000

replace clkgxp = clkgxp*cpi_to2016
by lin__i, sort : egen float cgp_8593 = total(clkgxp) if year >= 1985 & year <= 1993
by lin__i, sort : egen float clkgxp8593 = max(cgp_8593)
by lin__i, sort : egen float cgp_8599 = total(clkgxp) if year >= 1985 & year <= 1999
by lin__i, sort : egen float clkgxp8599 = max(cgp_8599)
sort lin__i year
drop cgp_8593 cgp_8599

tab year
save "$dir_data\temp_spousal_8299.dta", replace


clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year > 2004
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_spousal_0016.dta", keepusing(clkgxp)
drop if _merge == 2
rename _merge _merge_0016
keep if year >= 2000

replace clkgxp = clkgxp*cpi_to2016
by lin__i, sort : egen float cgp_0016 = total(clkgxp)
by lin__i, sort : egen float clkgxp0016 = max(cgp_0016)
sort lin__i year
drop cgp_0016

tab year
save "$dir_data\temp_spousal_0016.dta", replace

append using "$dir_data\temp_spousal_8299.dta"
merge 1:1 lin__i year using "$dir_data/RESTAT_Response_R1_September_2023.dta"

tab _merge

* values are double those for individuals
gen cgp_8593 = 1 if clkgxp8593 >= 400000
replace cgp_8593 = 2 if (clkgxp8593 >= 300000 & clkgxp8593 < 400000)
replace cgp_8593 = 3 if (clkgxp8593 >= 200000 & clkgxp8593 < 300000)
replace cgp_8593 = 4 if (clkgxp8593 >= 100000 & clkgxp8593 < 200000)
replace cgp_8593 = 5 if (clkgxp8593 >= 50000 & clkgxp8593 < 100000)

tab cg_8593 cgp_8593 if married == 0
tab cg_8593 cgp_8593 if married == 1
pause

gen alt_cg_8593 = cg_8593
replace alt_cg_8593 = 2 if cgp_8593 == 2 & cg_8593 >= 3 & married == 1
tab cg_8593 alt_cg_8593

gen temp_married_status_change = 0
replace temp_married_status_change = 1 if (married != married[_n-1]) & (lin__i == lin__i[_n-1]) & (year == year[_n-1] + 1)
by lin__i, sort : egen float married_status_change = max(temp_married_status_change)

save "$dir_data\R1_spousal_analysis_sample.dta", replace
	
clear
use "$dir_data\R1_spousal_analysis_sample.dta"
drop if year < 1990
drop if year > 1999

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

drop treat_cg_8593 T_event
gen treat_cg_8593 = (alt_cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-10=-10)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

drop cg_8593
rename alt_cg_8593 cg_8593
gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.


global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi   ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2) & clkgxi > 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F12b.dta", replace

*** Intensive LOGS graphs: ***
clear
use "$dir_results\RESTAT_Figure_F12b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F12b.xlsx", firstrow(variables) replace


*** EXTENSIVE MARGIN REGRESSIONS ***
clear
use "$dir_data\R1_spousal_analysis_sample.dta"

drop if year < 1990
drop if year > 1999

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

drop treat_cg_8593 T_event
gen treat_cg_8593 = (alt_cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-10=-10)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

drop cg_8593
rename alt_cg_8593 cg_8593
gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year
global num_reports=4

di  " ALTOGETHER"
global version=2
reg pos_clkgxi   ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F12a.dta", replace


*** EXTENSIVE graphs: ***
clear
use "$dir_results\RESTAT_Figure_F12a.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F12a.xlsx", firstrow(variables) replace


}



************ FIGURE F13: Drop Tax Filers Whose Marital Status Changes **********

if (`figureF13') {


*** LOGS REGRESSIONS ***
clear
use "$dir_data\R1_spousal_analysis_sample.dta"

drop if married_status_change == 1
drop if year < 1990
drop if year > 1999

* keep only those tax filers with positive realizations
keep if clkgxi > 0

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.


global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year


di  " ALTOGETHER"
global version=2
reg log_clkgxi   ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2) & clkgxi > 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F13b.dta", replace

*** LOGS graphs: ***
clear
use "$dir_results\RESTAT_Figure_F13b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F13b.xlsx", firstrow(variables) replace


*** EXTENSIVE MARGIN REGRESSIONS ***
clear
use "$dir_data\R1_spousal_analysis_sample.dta"

drop if married_status_change == 1
drop if year < 1990
drop if year > 1999

char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

global i=1
foreach g in `groups'   {
   
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg pos_clkgxi   ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F13a.dta", replace



*** EXTENSIVE graphs: ***
clear
use "$dir_results\RESTAT_Figure_F13a.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F13a.xlsx", firstrow(variables) replace



}


****** FIGURE F14: Drop Tax Filers Who Ever Exceed the $100K LCGE Limit ********

if (`figureF14') {

* LOGS Intensive Margin Regressions
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_final_0016.dta", keepusing(xtiici klpyci xdiv_i rrspci invi)
drop if _merge == 2
rename _merge _merge_0016
keep if year >= 2000
tab year
save "$dir_data\temp_0016.dta", replace

append using "$dir_data\temp_8299.dta"
drop if year < 1985
by lin__i, sort : egen float cg_8516 = total(clkgxi)
gen ever_over_LCGE = 0
replace ever_over_LCGE = 1 if cg_8593 <= 2
replace ever_over_LCGE = 1 if cg_8593 >= 3 & cg_8516 >= 150000
drop if cg_8593 == 1

tab cg_8593 ever_over_LCGE
drop if cg_8593 >=3 & cg_8516 >= 150000
tab cg_8593 ever_over_LCGE

drop if year < 1990
drop if year > 1999

drop log_clkgxi
gen log_clkgxi = log(clkgxi)

char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.


global i=1
foreach g in `groups' {
	
global cg_8593=`g'
qui sum log_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi   ${main_independent} `demographics' `other' if (cg_8593 == `g' | cg_8593 == 2) & clkgxi > 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593 == `g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F14b.dta", replace

*** LOGS graphs: ***
clear
use "$dir_results\RESTAT_Figure_F14b.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F14b.xlsx", firstrow(variables) replace



***** POOLED TREATMENT GROUPS: LOGS Intensive Margin Regressions *****
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_final_0016.dta", keepusing(xtiici klpyci xdiv_i rrspci invi)
drop if _merge == 2
rename _merge _merge_0016
keep if year >= 2000
tab year
save "$dir_data\temp_0016.dta", replace

append using "$dir_data\temp_8299.dta"
drop if year < 1985
by lin__i, sort : egen float cg_8516 = total(clkgxi)
gen ever_over_LCGE = 0
replace ever_over_LCGE = 1 if cg_8593 <= 2
replace ever_over_LCGE = 1 if cg_8593 >= 3 & cg_8516 >= 150000
drop if cg_8593 == 1

tab cg_8593 ever_over_LCGE
drop if cg_8593 >=3 & cg_8516 >= 150000
tab cg_8593 ever_over_LCGE

drop if year < 1990
drop if year > 1999

keep if clkgxi > 0
drop log_clkgxi
gen log_clkgxi = log(clkgxi)

drop treat_cg_8593 T_event
gen treat_cg_8593 = (cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-10=-10)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.


global i=1
global cg_8593=3
qui sum log_clkgxi if cg_8593 >= 3 & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593 >= 3 & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593 >= 3 & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593 >= 3
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg log_clkgxi   ${main_independent} `demographics' `other' if (cg_8593 >= 3 | cg_8593 == 2) & clkgxi > 0, vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593 >= 3 & _n>=1500


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F14b_pooled.dta", replace

*** POOLED TREATMENT GROUP LOGS graphs: ***
clear
use "$dir_results\RESTAT_Figure_F14b_pooled.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F14b_pooled.xlsx", firstrow(variables) replace


****** EXTENSIVE MARGIN REGRESSIONS ******
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_final_0016.dta", keepusing(xtiici klpyci xdiv_i rrspci invi)
drop if _merge == 2
rename _merge _merge_0016
keep if year >= 2000
tab year
save "$dir_data\temp_0016.dta", replace

append using "$dir_data\temp_8299.dta"
drop if year < 1985
by lin__i, sort : egen float cg_8516 = total(clkgxi)
gen ever_over_LCGE = 0
replace ever_over_LCGE = 1 if cg_8593 <= 2
replace ever_over_LCGE = 1 if cg_8593 >= 3 & cg_8516 >= 150000
drop if cg_8593 == 1

tab cg_8593 ever_over_LCGE
drop if cg_8593 >=3 & cg_8516 >= 150000
tab cg_8593 ever_over_LCGE

drop if year < 1990
drop if year > 1999

drop treat_cg_8593 T_event
gen treat_cg_8593 = (cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-10=-10)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.


global i=1
foreach g in `groups' {
	
global cg_8593=`g'
qui sum pos_clkgxi if cg_8593==`g' & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg pos_clkgxi   ${main_independent} `demographics' `other' if (cg_8593 == `g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"

drop if cg_8593 == `g' & _n>=1500

}

keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F14a.dta", replace

*** EXTENSIVE MARGIN GRAPHS ***
clear
use "$dir_results\RESTAT_Figure_F14a.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year

gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F14a.xlsx", firstrow(variables) replace


*** EXTENSIVE MARGIN REGRESSIONS ***
clear
use "$dir_data\data_sample_DD_unconditional.dta"
drop if year < 1985
merge 1:1 lin__i year using "$dir_data\temp_sample"
rename _merge _merge_temp_sample
merge 1:1 lin__i year using "$dir_data\capital_income_final_0016.dta", keepusing(xtiici klpyci xdiv_i rrspci invi)
drop if _merge == 2
rename _merge _merge_0016
keep if year >= 2000
tab year
save "$dir_data\temp_0016.dta", replace

append using "$dir_data\temp_8299.dta"
drop if year < 1985
by lin__i, sort : egen float cg_8516 = total(clkgxi)
gen ever_over_LCGE = 0
replace ever_over_LCGE = 1 if cg_8593 <= 2
replace ever_over_LCGE = 1 if cg_8593 >= 3 & cg_8516 >= 150000
drop if cg_8593 == 1

tab cg_8593 ever_over_LCGE
drop if cg_8593 >=3 & cg_8516 >= 150000
tab cg_8593 ever_over_LCGE

drop if year < 1990
drop if year > 1999

drop treat_cg_8593 T_event
gen treat_cg_8593 = (cg_8593 >= 3)
gen T_event = treat_cg_8593*(year - 1994)
replace T_event = -1 if treat_cg_8593 == 0
recode T_event (.=-1)  (-1000/-10=-10)  (12/10000=12)
*recode T_event (.=-1)
char T_event[omit] -1
xi i.T_event, pref(_)

local groups "3 4 5"
local demographics age age2 age3  i.fcmp_i  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.


global i=1
global cg_8593=3
qui sum pos_clkgxi if cg_8593 >= 3 & year==1993
global ave_clkgxi =`r(mean)' 
qui sum mtr if cg_8593 >= 3 & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593 >= 3 & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593 >= 3
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg pos_clkgxi   ${main_independent} `demographics' `other' if (cg_8593 >= 3 | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save.do"


drop if cg_8593 >= 3 & _n>=1500


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr ave_clkgxi
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr ave_clkgxi version b* se*
save "$dir_results\RESTAT_Figure_F14a_pooled.dta", replace

*** POOLED TREATMENT GROUPS EXTENSIVE MARGIN GRAPHS ***
clear
use "$dir_results\RESTAT_Figure_F14a_pooled.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr ave_clkgxi cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel using "$dir_results\RESTAT_Figure_F14a_pooled.xlsx", firstrow(variables) replace

}


***************** FIGURE G1: Demographic Characteristics ***********************

if (`figureG1') {

cap log close
log using "$dir_log\RESTAT_Figure_G1.log", replace


clear
use "$dir_results\RESTAT_Figure_5.dta"

keep cg_8593 year age se_age age_wgt se_age_wgt married se_married married_wgt se_married_wgt tnkidi se_tnkidi tnkidi_wgt se_tnkidi_wgt tirc_i se_tirc_i tirc_i_wgt se_tirc_i_wgt rtirc_i max1_tirc_i sum_tirc_i domtest_tirc_i rvalue_tirc_i txi__i se_txi__i txi__i_wgt se_txi__i_wgt rtxi__i max1_txi__i sum_txi__i domtest_txi__i rvalue_txi__i


save "$dir_results\RESTAT_Figure_G1.dta", replace

export excel using "$dir_results\RESTAT_Figure_G1.xlsx", firstrow(variables) replace
 
}



***************** FIGURE G1 - Refeee 2 Request (Round 2 of R&R): RAW MEANS (EXTENSIVE & INTENSIVE) ******************

if (`figureG1_Referee2') {

cap log close
log using "$dir_log\RESTAT_Figure_G1_Referee2.log", replace


clear
use "$dir_data/data_sample_DD_unconditional.dta"

merge lin__i year using "$dir_data/province_temp.dta"
replace province = province2 if province == . & year >= 2000
drop province2
keep if _merge == 3
drop _merge

gen flag_missing = (clkgxi8593 == .)
drop if flag_missing == 1
drop flag_missing



gen tot_income_less_cg = tirc_i - clkgxi
gen dummy_tot_income_less_cg = (tot_income_less_cg > 0)
gen log_tot_income_less_cg = log(tot_income_less_cg)
gen ihs_tot_income_less_cg = log(tot_income_less_cg + sqrt(1 + tot_income_less_cg^2))


keep year cg_8593 wgt2_i clkgxi tot_income_less_cg dummy_tot_income_less_cg log_tot_income_less_cg ihs_tot_income_less_cg cpi_to2016 

* (1) number of individ with positive gains
gen total=1
ge clkgxi_dummy = (clkgxi > 0)

* (2) summary stats for those with positive gains
gen pos_tot_income_less_cg=tot_income_less_cg if clkgxi>0 


* (3) Generate weighted/perturb versions of the variables of interest
foreach var in tot_income_less_cg dummy_tot_income_less_cg pos_tot_income_less_cg log_tot_income_less_cg ihs_tot_income_less_cg  {
	gen `var'_wgt = `var'*wgt2_i
}


*  DOMINANCE TEST
foreach var in tot_income_less_cg pos_tot_income_less_cg log_tot_income_less_cg ihs_tot_income_less_cg  {
	bysort cg_8593 year: egen max1_`var'=max(`var')
	gen temp=`var' if `var'!=max1_`var'
	bysort cg_8593 year: egen max2_`var'=max(temp)
	replace temp=.
	replace temp=`var' if `var'!=max1_`var' & `var'!=max2_`var'
	bysort cg_8593 year: egen max3_`var'=max(temp)
	gen sum_`var'=`var' if `var'!=max1_`var' & `var'!=max2_`var' & `var'!=max3_`var'
	drop temp max2_`var' max3_`var'
}

gen clkgxi_count = clkgxi_dummy


collapse (sum) clkgxi_count (mean) tot_income_less_cg dummy_tot_income_less_cg pos_tot_income_less_cg log_tot_income_less_cg ihs_tot_income_less_cg (semean) se_tot_income_less_cg=tot_income_less_cg se_dummy_tot_income_less_cg=dummy_tot_income_less_cg se_pos_tot_income_less_cg=pos_tot_income_less_cg se_log_tot_income_less_cg=log_tot_income_less_cg se_ihs_tot_income_less_cg=ihs_tot_income_less_cg (mean) tot_income_less_cg_wgt dummy_tot_income_less_cg_wgt pos_tot_income_less_cg_wgt log_tot_income_less_cg_wgt ihs_tot_income_less_cg_wgt (semean) se_tot_income_less_cg_wgt=tot_income_less_cg_wgt se_dummy_tot_income_less_cg_wgt=dummy_tot_income_less_cg_wgt se_pos_tot_income_less_cg_wgt=pos_tot_income_less_cg_wgt se_log_tot_income_less_cg_wgt=log_tot_income_less_cg_wgt se_ihs_tot_income_less_cg_wgt=ihs_tot_income_less_cg_wgt (mean) max1_tot_income_less_cg max1_pos_tot_income_less_cg max1_log_tot_income_less_cg max1_ihs_tot_income_less_cg (sum) sum_tot_income_less_cg=tot_income_less_cg sum_pos_tot_income_less_cg=pos_tot_income_less_cg sum_log_tot_income_less_cg=log_tot_income_less_cg sum_ihs_tot_income_less_cg=ihs_tot_income_less_cg, by(cg_8593 year)

* PASS DOMINANCE TEST
foreach var in tot_income_less_cg pos_tot_income_less_cg log_tot_income_less_cg ihs_tot_income_less_cg  {
gen domtest_`var'=(max1_`var'/sum_`var'>0.8)
gen rvalue_`var'=max1_`var'/sum_`var'
}



* ROUND AMOUNTS TO THE NEAREST 100
foreach var in tot_income_less_cg pos_tot_income_less_cg log_tot_income_less_cg ihs_tot_income_less_cg {
	gen r`var'=.
	replace r`var'=round(`var',10) if `var'<=1000
	replace r`var'=round(`var',100) if `var'>1000

}


save "$dir_results\RESTAT_Figure_G1_Referee2.dta", replace

export excel using "$dir_results\RESTAT_Figure_G1_Referee2.xlsx", firstrow(variables) replace
 
}



***************** FIGURE G2: Demographic Characteristics ***********************
***************** SAMPLE RESTRICTED TO FILERS WITH CLKGXI > 0 ******************

if (`figureG2') {

cap log close
log using "$dir_log\RESTAT_Figure_G2.log", replace


clear
use "$dir_results\RESTAT_Figure_5.dta"

keep cg_8593 year pos_age se_pos_age pos_age_wgt se_pos_age_wgt pos_married se_pos_married pos_married_wgt se_pos_married_wgt pos_tnkidi se_pos_tnkidi pos_tnkidi_wgt se_pos_tnkidi_wgt pos_tirc_i se_pos_tirc_i pos_tirc_i_wgt se_pos_tirc_i_wgt rpos_tirc_i max1_pos_tirc_i sum_pos_tirc_i domtest_pos_tirc_i rvalue_pos_tirc_i pos_txi__i se_pos_txi__i pos_txi__i_wgt se_pos_txi__i_wgt rpos_txi__i max1_pos_txi__i sum_pos_txi__i domtest_pos_txi__i rvalue_pos_txi__i 

save "$dir_results\RESTAT_Figure_G2.dta", replace

export excel using "$dir_results\RESTAT_Figure_G2.xlsx", firstrow(variables) replace
 
}


********************** FIGURE G3: Income Growth Rates **************************
********************** Same data as Figures G1 & G2   **************************


********************** Figure H1: Time Series for Dividends,   *****************
********************** Investment Income and RRSP Contributions ****************

if (`figureH1') {

cap log close
log using "$dir_log\RESTAT_Figure_H1.log", replace

clear
use "$dir_data\capital_income_final_8299.dta"
keep year xdiv_i invi rrspci wgt2_i
append using "$dir_data\capital_income_final_0016.dta"
keep year xdiv_i invi rrspci wgt2_i


* Create Conditional on Positive Values

foreach var in xdiv_i invi_i rrspci {

gen pos_`var' = `var' if `var' > 0	
	
}


* Create Dummy Variables

foreach var in xdiv_i invi_i rrspci {

gen `var'_dummy = (`var' > 0)
	
}

* Create Weighted Values

foreach var in xdiv_i pos_xdiv_i xdiv_i_dummy invi_i pos_invi_i invi_i_dummy rrspci pos_rrspci rrspci_dummy {

gen `var'_wgt = `var'*wgt2_i	
	
}

*  DOMINANCE TEST
foreach var in xdiv_i pos_xdiv_i invi_i pos_invi_i rrspci pos_rrspci {
	
	bysort year: egen max1_`var'=max(`var')
	gen temp=`var' if `var'!=max1_`var'
	bysort year: egen max2_`var'=max(temp)
	replace temp=.
	replace temp=`var' if `var'!=max1_`var' & `var'!=max2_`var'
	bysort year: egen max3_`var'=max(temp)
	gen sum_`var'=`var' if `var'!=max1_`var' & `var'!=max2_`var' & `var'!=max3_`var'
	drop temp max2_`var' max3_`var'
}

gen clkgxi_count = 1


collapse (sum) clkgxi_count (mean) xdiv_i xdiv_i_dummy pos_xdiv_i invi_i invi_i_dummy pos_invi_i rrspci rrspci_dummy pos_rrspci (semean) se_xdiv_i=xdiv_i se_xdiv_i_dummy=xdiv_i_dummy se_pos_xdiv_i=pos_xdiv_i se_invi_i=invi_i se_invi_i_dummy=invi_i_dummy se_pos_invi_i=pos_invi_i se_rrspci=rrspci se_rrspci_dummy=rrspci_dummy se_pos_rrspci=pos_rrspci (mean) xdiv_i_wgt xdiv_i_dummy_wgt pos_xdiv_i_wgt invi_i_wgt invi_i_dummy_wgt pos_invi_i_wgt rrspci_wgt rrspci_dummy_wgt pos_rrspci_wgt (semean) se_xdiv_i_wgt=xdiv_i_wgt se_xdiv_i_dummy_wgt=xdiv_i_dummy_wgt se_pos_xdiv_i_wgt=pos_xdiv_i_wgt se_invi_i_wgt=invi_i_wgt se_invi_i_dummy_wgt=invi_i_dummy_wgt se_pos_invi_i_wgt=pos_invi_i_wgt se_rrspci_wgt=rrspci_wgt se_rrspci_dummy_wgt=rrspci_dummy_wgt se_pos_rrspci_wgt=pos_rrspci_wgt (mean) max1_xdiv_i max1_pos_xdiv_i max1_invi_i max1_pos_invi_i max1_rrspci max1_pos_rrspci (sum) sum_xdiv_i=xdiv_i sum_pos_xdiv_i=pos_xdiv_i sum_invi_i=invi_i sum_pos_invi_i=pos_invi_i sum_rrspci=rrspci sum_pos_rrspci=pos_rrspci, by(year)
		


* PASS DOMINANCE TEST
foreach var in xdiv_i pos_xdiv_i invi_i pos_invi_i rrspci pos_rrspci {
gen domtest_`var'=(max1_`var'/sum_`var'>0.8)
gen rvalue_`var'=max1_`var'/sum_`var'
}



* ROUND AMOUNTS TO THE NEAREST 100
foreach var in xdiv_i_wgt pos_xdiv_i_wgt invi_i_wgt pos_invi_i_wgt rrspci_wgt pos_rrspci_wgt {
	gen r`var'=.
	replace r`var'=round(`var',10) if `var'<=1000
	replace r`var'=round(`var',100) if `var'>1000

}


save "$dir_results\RESTAT_Figure_H1.dta", replace

export excel using "$dir_results\RESTAT_Figure_H1.xlsx", firstrow(variables) replace



}


******************* Figure H4: Sustitution Responses for Other ****************
******************* Capital Income (Total/Unconditional Resp.)  ****************

if (`figureH4') {
	
cap log close
log using "$dir_log\RESTAT_Figure_H4.log", replace
	

local outcomes "ihs_xdiv_i ihs_invi_i ihs_rrspci"
foreach v in `outcomes' {
*** UNCONDITIONAL LOGS REGRESSIONS ***
clear
use "$dir_data/data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1
drop if cg_8593 == 6

merge 1:1 lin__i year using "$dir_data\capital_income_final_8299.dta", keepusing(invi_i rrspci xdiv_i)
drop if _merge == 2

gen ihs_invi_i = log(invi_i + sqrt(1 + invi_i^2))
gen ihs_xdiv_i = log(xdiv_i + sqrt(1 + xdiv_i^2))
gen ihs_rrspci = log(rrspci + sqrt(1 + rrspci^2))

drop _merge

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1

foreach g in `groups'   {
   
global cg_8593=`g'
qui sum `v' if cg_8593==`g' & year==1993
global ave_`v' =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg `v'  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_`v'.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr version b* se*
save "$dir_results\RESTAT_Figure_H4_`v'_unconditional.dta", replace


clear
use "$dir_results\RESTAT_Figure_H4_`v'_unconditional.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel "$dir_results\RESTAT_Figure_H4_`v'_unconditional.xlsx", firstrow(variables) replace

}

}



******************* Figure H3: Sustitution Responses for Other *****************
******************* Capital Income (Intensive Margin Resp.)    *****************

if (`figureH3') {
	
cap log close
log using "$dir_log\RESTAT_Figure_H3.log", replace	
	

local outcomes "log_xdiv_i log_invi_i log_rrspci"
foreach v in `outcomes' {
*** INTENSIVE MARGIN LOGS REGRESSIONS ***
clear
use "$dir_data/data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1
drop if cg_8593 == 6

merge 1:1 lin__i year using "$dir_data\capital_income_final_8299.dta", keepusing(invi_i rrspci xdiv_i)
drop if _merge == 2

gen log_invi_i = log(invi_i) if invi_i > 0
gen log_xdiv_i = log(xdiv_i) if xdiv_i > 0
gen log_rrspci = log(rrspci) if rrspci > 0

drop _merge

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.
gen ave_clkgxi=.

local groups "3 4 5"

global i=1

foreach g in `groups'   {
   
global cg_8593=`g'
qui sum `v' if cg_8593==`g' & year==1993
global ave_`v' =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg `v'  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_`v'.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr version b* se*
save "$dir_results\RESTAT_Figure_H3_`v'_intensive.dta", replace


clear
use "$dir_results\RESTAT_Figure_H3_`v'_intensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel "$dir_results\RESTAT_Figure_H3_`v'_intensive.xlsx", firstrow(variables) replace

}

}


******************* Figure H2: Sustitution Responses for Other ****************
******************* Capital Income (Extensive Margin Resp.)    ****************

if (`figureH2') {
	
cap log close
log using "$dir_log\RESTAT_Figure_H2.log", replace	

local outcomes "xdiv_i_dummy invi_i_dummy rrspci_dummy"
foreach v in `outcomes' {
*** EXTENSIVE MARGIN REGRESSIONS ***
clear
use "$dir_data/data_sample_DD_unconditional.dta"
drop if year < 1990
drop if year > 1999
drop if flag_losses == 1
drop if cg_8593 == 6

merge 1:1 lin__i year using "$dir_data\capital_income_final_8299.dta", keepusing(invi_i rrspci xdiv_i)
drop if _merge == 2

gen invi_i_dummy = 0
replace invi_i_dummy = 1 if invi_i > 0
gen xdiv_i_dummy = 0
replace xdiv_i_dummy = 1 if xdiv_i > 0
gen rrspci_dummy = 0
replace rrspci_dummy = 1 if rrspci > 0

drop _merge

* set as panel data
sort lin__i year
xtset lin__i year

char T_event[omit] -1
xi i.T_event, pref(_)

* set controls
set matsize 10000
local demographics age age2 age3  i.num_sxco_i i.tnkidi  
local other i.province 

gen cg_8593_2=.
gen version=.
gen num_reports_2=.
gen N=.
forvalues y=1(1)10{
gen b`y'=.
gen se`y'=.
}
gen N_cg_8593=.
gen ave_mtr_zero=.
gen ave_mtr=.


local groups "3 4 5"

global i=1

foreach g in `groups'   {
   
global cg_8593=`g'
qui sum `v' if cg_8593==`g' & year==1993
global ave_`v' =`r(mean)' 
qui sum mtr if cg_8593==`g' & year==1993
global mtr=`r(mean)'
qui sum mtr_zero if cg_8593==`g' & year==1993
global mtr_zero=`r(mean)'
di $mtr_zero
qui sum cg_8593 if cg_8593==`g'
global N_cg_8593=`r(N)'

global main_independent _T_event_1 _T_event_2 _T_event_3 _T_event_5 _T_event_6 _T_event_7 _T_event_8 _T_event_9 _T_event_10 i.cg_8593 i.year

di  " ALTOGETHER"
global version=2
reg `v'  ${main_independent} `demographics' `other'  if (cg_8593==`g' | cg_8593 == 2), vce(cluster lin__i)
do "$dir_do/DiffnDiff_Save_`v'.do"

drop if cg_8593==`g' & _n>=1500

}


keep N N_cg_8593 cg_8593_2 version b* se* ave_mtr_zero ave_mtr 
order N N_cg_8593 cg_8593_2 ave_mtr_zero ave_mtr version b* se*
save "$dir_results\RESTAT_Figure_H2_`v'_extensive.dta", replace


clear
use "$dir_results\RESTAT_Figure_H2_`v'_extensive.dta"
replace b4 = 0
drop if cg_8593_2==.
reshape long   b se , i(version N ave_mtr_zero ave_mtr cg_8593_2) j(year)
replace se = 0 if se == .
gen lb=b-1.96*se
gen ub=b+1.96*se
replace year = 1989 + year


gen N_round = round(N,5)
gen N_cg_8593_round = round(N_cg_8593,5)
export excel "$dir_results\RESTAT_Figure_H2_`v'_extensive.xlsx", firstrow(variables) replace

}

}
